#################################################################################### 
####################################################################################
####################################################################################
# Analyze HH-Level Data
####################################################################################
####################################################################################
library(here)
library(kableExtra)
library(RItools)
####################################################################################
####################################################################################
## Read in survey data (from: analysis_balance.R)
dat = readRDS(here("data/dat_unrestricted.rds"))

# Implement SWE of ATE: Demean all covariates, fully interact
cols = c("sex","age","education_level","Arua","Bushenyi","Ibanda","Jinja","Mbale","Mpigi","Pallisa","Mbarara","Shema")
dat[, paste0("c_", cols)] = lapply(dat[, cols], function(x) (x - mean(x))  )
covariates = c(#"sex + age + education_level + Arua + Bushenyi + Ibanda + Jinja + Mbale + Mpigi + Pallisa + Mbarara",
  #"Arua + Bushenyi + Ibanda + Jinja + Mbale + Mpigi + Pallisa + Mbarara",
  "treatment*c_sex + c_age*treatment + c_education_level*treatment + c_Arua*treatment + c_Bushenyi*treatment + c_Ibanda*treatment + c_Jinja*treatment + c_Mbale*treatment + c_Mpigi*treatment + c_Pallisa*treatment + c_Mbarara*treatment", 
  "c_Arua*treatment + c_Bushenyi*treatment + c_Ibanda*treatment + c_Jinja*treatment + c_Mbale*treatment + c_Mpigi*treatment + c_Pallisa*treatment + c_Mbarara*treatment")

####################################################################################
####################################################################################
## Table D.7

## Household
# Omnibus test (https://egap.org/methods-guides/10-things-you-need-know-about-cluster-randomization; Hansen, Bowers 2008)
model = formula("treatment ~ sex + education_level + age + living_conditions + living_in_village + nadk")
balance_test = xBalance(model,
                        strata = factor(dat$districtname), #list(noblocks=NULL)
                        #cluster = factor(dat$village_id), 
                        data = dat, na.rm = T,
                        report=c("adj.means","adj.mean.diffs","z.scores","chisquare.test","p.values"))
rownames(balance_test$results) = c("Gender","Education (1-6)","Age", "Living Conditions","Time in Village", "DK/R")
rownames(balance_test$overall) = c("Overall Test Statistic")

kable(round(balance_test$results,2),align = "c", booktabs = T, caption = "Household-Level Block-Adjusted Omnibus Balance Test", 
      col.names = c("Control Mean", "Treatment Mean", "Difference","Z-score","P-value"), format = "latex")
kable(round(balance_test$overall,2), align = "c", row.names = T,
      col.names = c("Chi-Sq", "Df", "P-value"), format = "latex")

# Joint Orthogonality Test (https://blogs.worldbank.org/impactevaluations/tools-trade-joint-test-orthogonality-when-testing-balance)
for (i in names(dat[c("treatment")])){ # Dependent Variables
  model = paste(i,"~","sex + education_level + age + living_conditions + living_in_village + nadk")
  # Run each model
  assign(x = paste("m",i, sep = "."), 
         value = lm(as.formula(model), data = dat))
  # Output clustered SEs (county)
  assign(x = paste("c",i, sep = "."), 
         value = coeftest(lm(as.formula(model), data = dat),
                          cluster.vcov(lm(as.formula(model), data = dat), dat$village_id)))
}

lapply(grep("^c\\.", ls(), value = T), get)
rm(list = grep("^(c|m)\\.", ls(), value = T))
####################################################################################